###### Clustered Standard Errors ######
### Author: Nils-Christian Bormann
### Adapted from code by Daina Chiba
### This version: 30 Nov 2012
### Task: Program a function that computes clustered standard errors after GLMs

### Function to calculate clustered s.e. for logit/probit model
cluster <- function(fit=NA, y=NA, cluster=NA, link="Logit"){         # fitted object from glm
	
	# check input and define error messages
	if(!is.list(fit)) stop("Error: GLM output has wrong format. Must be class 'GLM' and structure 'list'. ")
	if(length(cluster)==1){if(is.na(cluster)) stop("Error: Cluster variable not defined.")} 
	if(length(cluster) != dim(fit$model)[1]) stop("Error: Cluster variable and data vector are of different length.")
	if(!is.element(link, c("Logit", "Probit"))) stop("Error: Functional form misspelled or unknown.")
	if(length(y)==1){if(is.na(y)) stop("Error: Response variable y not defined.")}
	
	# Define functional form
	if(link=="Logit"){
		F <- function(x){1/(1+exp(-x))}       # logit CDF
		f <- function(x){exp(x)/(1+exp(x))^2} # logit PDF
	}
	
	if(link=="Probit"){
		F <- function(x){pnorm(x)}
		f <- function(x){dnorm(x)}
	}
	
	# Define parameters
	fml <- fit$formula # model formula
	data <- fit$model
	beta <- fit$coef # estimated parameters
	vcov <- vcov(fit) # Variance-Covariance Matrix
	k <- length(beta) # number of estimated parameters
	m <- dim(table(cluster))    # N of clusters	
	

	xvars <- names(beta)            # Name of covariates ordered accordingly
	xvars <- xvars[2:length(xvars)] # Delete (Intercept)
	
	# Delete Missing values
	data <- as.data.frame(cbind(y, data[,xvars], cluster))
	names(data) <- c("y", xvars, "cluster")
	data <- na.omit(data)
	y <- data$y
	cluster <- data$cluster
	
	xmat <- as.matrix(data[,xvars]) # Design matrix
	xmat <- cbind(1, xmat)          # Add intercept
	xb <- xmat %*% beta             # linear predictor (xb)
	
	
	## Now, obtain clustered s.e.
	u <- ((y==1) * f(xb)/F(xb) + (y==0) * -f(xb)/(1-F(xb)))[,1] * xmat
	u.clust <- matrix(NA, nrow=m, ncol=k)
	
	fc <- factor(cluster)

	# for each covariate (column) sum over clusters (in rows)
	u.clust <- apply(u,2,function(x) tapply(x, fc, sum)) ## sum over cluster i

	cl.vcov <- vcov %*% ((m/(m-1)) * t(u.clust)%*%(u.clust)) %*% vcov ## sandwich - isn't this Huber-White Robust Transformation (is it possible to only cluster)
	cl.se <- sqrt(diag(cl.vcov)) ## clustered s.e.	
	
	## Display Clustered Standard Errors
	rnd <- 3          # rounding point
	z <- beta/cl.se # z-values
	pval <- 2*(1-pnorm(abs(beta/cl.se))) # p-values
	cl.tbl <- cbind(beta = round(beta,rnd), # table
			se = round(cl.se,rnd),
			z = round(z,2),
			pval = round(pval,rnd))
	colnames(cl.tbl) <- c("coef", "s.e.", "z", "P>|z|")
	
	output <- list()
	output[[1]] <- cl.tbl
	output[[2]] <- cl.vcov
	names(output) <- c("reg_table", "vcov")
	

#	print("Results w/ Robust s.e. clustered by Dyad")

	return(output)	
}
